*****************************
* 1. CREATE FAMILY INFORMATION *
*****************************

*This do-file prepares family data. i.e gathers information on immigrants' family members
*this is prepared to later exclude those who had an (adult) household member 
*or a sibling in the country when they arrived to Sweden

///SET DIRECTORY/log-files/set more off

clear all
cd "D:\SCB_ConPol\Stata"
set more off
capture log close


///THE FOLLOWING LINE OF CODE PICKS OUT THOSE WITH HOUSEHOLD MEMEBERS IN THE COUNTRY 1987-1989
///THIS IS DONE SEPARATELY FROM 1990-1991, SINCE THE 
///HOUSEHOLD ID VARIABLE ONLY EXISTS FROM 1990

foreach y in 85 86 87 88 89 {

*open data file with info on all migrations over a large period of time 
use "RTB\migrationer"

*Create immigration variable - keep immigrants - drop emmigrations

tab PostTyp, gen(m)
	keep if m1==1
	drop PostTyp m1 m2
 
*Generate year and month of arrival - 

tostring Datum, replace
gen im_year = substr(Datum, 1, 4) 
destring im_year, replace

gen im_month = substr(Datum, 5, 2) 
destring im_month, replace

*keep only year in question 

keep if im_year==19`y'

*small share who immigrated twise during a year - only need one obs. 

duplicates tag LopNr, gen(tag)
sort tag LopNr

*Keep the observation from the latest immigration month
bysort LopNr: egen latestmonth = max(im_month)
	drop if im_month!=latestmonth & tag>0
*check for duplicates - OK
	duplicates report LopNr
	drop tag latestmonth

///Now merge on data from 1990 with information on households

merge 1:m LopNr using "LISA\Lisa_1990"
drop _merge

*is there information about family? Otherwise drop 
drop if FamId==.	

*Number of household members
duplicates tag FamId, gen(tag_household)

*Singles - these did not have family - drop.
*will be small measurement error here: 
*an ind who arrvived as a family migrant 1988 (for example), and divorced in 1989
*will be single in the data. We expect this to be few ind.  
gen single = 1 if tag_household==0
drop if single==1

*FAMILY with you - define as arriving in the same year as household member:
duplicates tag FamId im_year, gen(tag_hh2)
gen family_im = 1 if im_year==19`y' & tag_household>0 & tag_household==tag_hh2
drop if family_im==1

*Remaining immigrant arriving in `y' should be family migrants:
* they did not arrive as singles (household member in the country)
* and household member did not arrive same year

*However, Since we only have housholds in 1990
*Need to drop the kins who arrived after 1987, 1988 and 1989
*otherewise we are going to treat an individual who arrived in 1987
*and had family coming in 1989, as a family migrant

*Take out a file with all immigrants who arrived 1985-1990
preserve
duplicates drop LopNr, force
*keep only those that did not arrive in year`y'
keep if im_year==.
rename Datum dtm
*merge with migration data and keep only migrants
merge 1:m LopNr using "RTB\migrationer", keepusing(Datum PostTyp)
keep if _merge==3
drop _merge
*Only immigrants
tab PostTyp, gen(m)
	keep if m1==1
	drop PostTyp m1 m2
tostring Datum, replace
*immigration year
gen im_year2 = substr(Datum, 1, 4) 
destring im_year2, replace
*family arriving after 1990 not intersting - since houshold id based on 1990
drop if im_year2>1990
gen spec = 1 if im_year2>19`y'
duplicates drop LopNr, force

save "E:\ProjData\UtplaceringsData8594\tempfamily", replace
restore 

merge m:1 LopNr using "E:\ProjData\UtplaceringsData8594\tempfamily"

///drop those who imigrated after 1987 and redo the household calculation
drop if spec==1

///household member in the family, only prior year household members
drop tag_household
duplicates tag FamId, gen(tag_household)

*Family in the country
gen anhorig = 1 if im_year==19`y' & tag_household>0 

keep LopNr anhorig  

*drop a very small number of duplicates
duplicates drop LopNr, force

	save "E:\ProjData\UtplaceringsData8594\hushåll`y'", replace
}


///NOW REPEAT A SIMILAR CODE FOR 1990-1994
foreach y in 90 91 92 93 94 {

use "RTB\migrationer", clear

*Create immigration variable - keep immigrants

tab PostTyp, gen(m)
	keep if m1==1
	drop PostTyp m1 m2
 
*Generate year, month of arrival - keep only year in question 

tostring Datum, replace
gen im_year = substr(Datum, 1, 4) 
destring im_year, replace

gen im_month = substr(Datum, 5, 2) 
destring im_month, replace

	keep if im_year==19`y'

*small share who immigrated twise during a year

duplicates tag LopNr, gen(tag)
sort tag LopNr

*Keep the observation from the latest immigration month
bysort LopNr: egen latestmonth = max(im_month)
	drop if im_month!=latestmonth & tag>0
		*duplicates report LopNr - OK
	drop tag latestmonth

  bysort LopNr: egen cID = count(LopNr)
  drop if cID > 1
  drop cID
  
///NOW MERGE ON DATA WITH HOUSEHOLD VARIABLE

merge 1:m LopNr using "LISA\Lisa_19`y'"
drop if FamId==.	

duplicates tag FamId, gen(tag_household)

*SINGLES
gen single = 1 if tag_household==0
drop if single==1

*FAMILY with you:
duplicates tag FamId im_year, gen(tag_hh2)
gen family_im = 1 if im_year==19`y' & tag_household>0 & tag_household==tag_hh2
drop if family_im==1

*Family in the country
gen anhorig = 1 if im_year==19`y'

keep LopNr anhorig  

*drop a very small number of duplicates
duplicates drop LopNr, force

	save "E:\ProjData\UtplaceringsData8594\hushåll`y'", replace
}

///NOW PICK OUT THOSE WHO HAD SIBLINGS IN THE COUNTRY

*First create a file for everyone who immigrated in year`y'
foreach y in 85 86 87 88 89 90 91 92 93 94 {

use "RTB\migrationer", clear

*Create immigration variable - keep immigrants

tab PostTyp, gen(m)
	keep if m1==1
	drop PostTyp m1 m2
 
*Generate year, month of arrival - keep only year in question 

tostring Datum, replace
gen im_year = substr(Datum, 1, 4) 
destring im_year, replace

gen im_month = substr(Datum, 5, 2) 
destring im_month, replace

keep if im_year==19`y'

*small share who immigrated twise during a year

duplicates tag LopNr, gen(tag)
sort tag LopNr

*Keep the observation from the latest immigration month
   bysort LopNr: egen latestmonth = max(im_month)
	drop if im_month!=latestmonth & tag>0
	*duplicates report LopNr - OK
	drop tag latestmonth
	bysort LopNr: egen cID = count(LopNr)
	drop if cID>1
	drop cID
save "E:\ProjData\UtplaceringsData8594\syskon`y'", replace
	
	
	
///Now: sibling info and migration of the siblings
///start with a file definining only those living in the country in `y'
use "RTB\RTB_19`y'"

*file holds extremely few random duplicates - just drop - just need 1 obs per ind.
duplicates drop LopNr, force

*merge with data on migrations - keep only migrants
merge 1:m LopNr using "RTB\migrationer"
keep if _merge==3
drop _merge

*Create immigration variable to keep immigrants

tab PostTyp, gen(m)
	keep if m1==1
	drop PostTyp m1 m2
 
*Generate year of arrival - keep earliest immigration year
*only one migration per person
tostring Datum, replace
gen im_year = substr(Datum, 1, 4) 
destring im_year, replace

sort LopNr im_year
bysort LopNr: gen unique = _n

keep if unique==1

*rename id variable to note that these are the "siblings"
*keep only id number an im year
rename LopNr LopNrSyskon
keep LopNrSyskon im_year

*Now merge on sibling; to get id number of the sibling immigrants	
merge 1:m LopNrSyskon using "FlerGen\Syskon"
keep if _merge==3
drop _merge Syskontyp

*Now we have data with all immigrants living in Sweden year `y' and merged on 
*the id number of their sibling(s)
*We can therefore merge on the info on all who imigrated in year `y', which we created earlier

merge m:1 LopNr using "E:\ProjData\UtplaceringsData8594\syskon`y'"  

keep if _merge==3
drop _merge
order LopNr

*Only interested in siblings who immigrated before year `y'
keep if im_year<19`y' 

*Now: All individuals in the data immigrated in year `y', and had a resident sibling who immigrated an earlier year.

*drop duplicates cause ind might have sevewral siblings, and we only care if they has one or not.

duplicates drop LopNr, force

gen sibling_im=1 
keep LopNr sibling_im

	save "E:\ProjData\UtplaceringsData8594\syskon`y'", replace

	}	
	
	

*log close
